# Import packages
#install.packages("corrplot")
library(dplyr)

Attaching package: <91>dplyr<92>

The following objects are masked from <91>package:stats<92>:

    filter, lag

The following objects are masked from <91>package:base<92>:

    intersect, setdiff, setequal, union
library(data.table)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
data.table 1.14.6 using 4 threads (see ?getDTthreads).  Latest news: r-datatable.com

Attaching package: <91>data.table<92>

The following objects are masked from <91>package:dplyr<92>:

    between, first, last
library(ggplot2)
library(pastecs)

Attaching package: <91>pastecs<92>

The following objects are masked from <91>package:data.table<92>:

    first, last

The following objects are masked from <91>package:dplyr<92>:

    first, last
library(corrplot)
corrplot 0.92 loaded
#library(ggthemes) # For appearance of plot like theme in ggplot2
# Setting environment
# remove(list=ls())
# setwd("C:\\Users\\sunil\\Downloads\\College\\DAV\\Project")
# make evironment not to change large number to exponential
options(scipen = 999)
# Import dataset
nepal_dt <- read.csv("Source Dataset-API_NPL_DS2.csv", skip=4, header=TRUE, stringsAsFactors = FALSE)
meta_country <- read.csv("MetaData_Country.csv", header=TRUE, stringsAsFactors = FALSE)
meta_indictr <- read.csv("MetaData_Indicator.csv", header=TRUE, stringsAsFactors = FALSE)
nepal_dt
meta_country
meta_indictr

Data Preparation: Preparing data after the import

temp_df = filter(nepal_dt, grepl("tax", tolower(IndicatorName), fixed = TRUE) | grepl("tax", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- temp_df
nepal_df
dim(nepal_df)
[1] 53 66
temp_df = filter(nepal_dt, grepl("gdp", tolower(IndicatorName), fixed = TRUE) | grepl("gdp", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- rbind(nepal_df, temp_df)
nepal_df
dim(nepal_df)
[1] 143  66
temp_df = filter(nepal_dt, grepl("employment", tolower(IndicatorName), fixed = TRUE) | grepl("employment", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- rbind(nepal_df, temp_df)
nepal_df
# Drop first and second column
nepal_df <- nepal_df[-c(1,2)]
nepal_df
# unique(nepal_df$IndicatorName)
#table(tolower(nepal_df$IndicatorName))
# Transposing the dataframe
# df_t <- (t(nepal_df))
df_t <- transpose(nepal_df)
rownames(df_t) <- colnames(nepal_df)
colnames(df_t) <- rownames(nepal_df)
#View(df_t)
df_t[0,]
# Rename the columns with the first row. Columns are not properly renamed from above lines.
colnames(df_t) <- df_t[2,]
# Remove the first and second row.
df_t <- df_t[-1:-2,]
nepal_df <- df_t
View(nepal_df)
# Keep rownames as a first column
#setDT(df_t, keep.rownames = TRUE)[]
nepal_df <- cbind(names = rownames(nepal_df), nepal_df)
colnames(nepal_df)[1] <- "YEAR"
# Removing a character 'X' from the column: YEAR in nepal_df
nepal_df$YEAR <- gsub("X","",as.character(nepal_df$YEAR))
nepal_df
dim(nepal_df)[2]
[1] 243
nepal_df
# Converting columns to numeric types
#nepal_df$TM.TAX.MRCH.WM.AR.ZS = as.numeric(as.character(nepal_df$TM.TAX.MRCH.WM.AR.ZS))
#nepal_df$NY.GDP.PETR.RT.ZS = as.numeric(as.character(nepal_df$NY.GDP.PETR.RT.ZS))
nepal_df[1:dim(nepal_df)[2]] <- sapply(nepal_df[1:dim(nepal_df)[2]],as.numeric)
sapply(nepal_df, class)
                    YEAR     TM.TAX.MRCH.WM.AR.ZS        TM.TAX.MRCH.IP.ZS           NY.TAX.NIND.KN 
               "numeric"                "numeric"                "numeric"                "numeric" 
       TM.TAX.TCOM.BC.ZS        TM.TAX.MANF.BC.ZS        GC.TAX.INTT.RV.ZS     TM.TAX.MRCH.WM.FN.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
    TM.TAX.MRCH.SM.AR.ZS        TM.TAX.TCOM.IP.ZS        TM.TAX.MANF.IP.ZS           IC.TAX.GIFT.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       GC.TAX.TOTL.GD.ZS        GC.TAX.GSRV.VA.ZS        IC.TAX.LABR.CP.ZS           GC.TAX.YPKG.CN 
               "numeric"                "numeric"                "numeric"                "numeric" 
       TM.TAX.MRCH.BR.ZS           NY.TAX.NIND.CN        TM.TAX.MRCH.SR.ZS        IC.TAX.OTHR.CP.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          GC.TAX.YPKG.ZS           GC.TAX.IMPT.ZS           GC.TAX.OTHR.CN           GC.TAX.IMPT.CN 
               "numeric"                "numeric"                "numeric"                "numeric" 
    TM.TAX.TCOM.WM.AR.ZS     TM.TAX.MANF.WM.AR.ZS              IC.TAX.PAYM           GC.TAX.EXPT.CN 
               "numeric"                "numeric"                "numeric"                "numeric" 
       IC.TAX.TOTL.CP.ZS           IC.FRM.INFM.ZS           GC.TAX.GSRV.CN           GC.TAX.INTT.CN 
               "numeric"                "numeric"                "numeric"                "numeric" 
    TM.TAX.TCOM.WM.FN.ZS     TM.TAX.MANF.WM.FN.ZS     TM.TAX.MRCH.SM.FN.ZS     TM.TAX.TCOM.SM.AR.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
    TM.TAX.MANF.SM.AR.ZS           IC.FRM.METG.ZS        GC.TAX.GSRV.RV.ZS        TM.TAX.MRCH.BC.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.TAX.NIND.CD     TM.TAX.TCOM.SM.FN.ZS     TM.TAX.MANF.SM.FN.ZS              IC.TAX.METG 
               "numeric"                "numeric"                "numeric"                "numeric" 
       GC.TAX.YPKG.RV.ZS              IC.TAX.DURS           GC.TAX.TOTL.CN        TM.TAX.TCOM.BR.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       TM.TAX.MANF.BR.ZS        TM.TAX.TCOM.SR.ZS        TM.TAX.MANF.SR.ZS        IC.TAX.PRFT.CP.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          GC.TAX.EXPT.ZS        GC.TAX.OTHR.RV.ZS        TG.VAL.TOTL.GD.ZS           NY.GDP.MKTP.KD 
               "numeric"                "numeric"                "numeric"                "numeric" 
       NY.GDP.COAL.RT.ZS        NY.GDP.PCAP.PP.KD        NY.GDP.MINR.RT.ZS           NY.GDP.MKTP.KN 
               "numeric"                "numeric"                "numeric"                "numeric" 
    NY.GDP.DEFL.KD.ZG.AD           NV.SRV.TOTL.ZS        ER.GDP.FWTL.M3.KD     BX.TRF.PWKR.DT.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.GDP.PCAP.EM.KD        SE.XPD.TERT.PC.ZS           NY.GDS.TOTL.ZS        NY.GDP.MKTP.KD.ZG 
               "numeric"                "numeric"                "numeric"                "numeric" 
       NY.GDP.DEFL.KD.ZG        SH.XPD.CHEX.GD.ZS        SE.XPD.PRIM.PC.ZS        NY.GDP.PETR.RT.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.GDP.MKTP.CD           NE.DAB.TOTL.ZS        SH.XPD.GHED.GD.ZS        SE.XPD.TOTL.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          PA.NUS.PPPC.RF        NY.GDP.MKTP.PP.KD        NY.GDP.DEFL.ZS.AD           NE.GDI.TOTL.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       GC.TAX.TOTL.GD.ZS        FS.AST.DOMS.GD.ZS        FM.AST.PRVT.GD.ZS        EN.ATM.CO2E.KD.GD 
               "numeric"                "numeric"                "numeric"                "numeric" 
       NY.GDP.PCAP.PP.CD        NY.GDP.FRST.RT.ZS           NE.GDI.FTOT.ZS        SE.XPD.SECO.PC.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       NY.GDP.MKTP.CN.AD           NV.IND.MANF.ZS           NE.TRD.GNFS.ZS        GC.REV.XGRT.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       GB.XPD.RSDV.GD.ZS     EG.USE.COMM.GD.PP.KD        GC.NLD.TOTL.GD.ZS        BN.CAB.XOKA.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       BG.GSR.NFSV.GD.ZS           NE.CON.PRVT.ZS        GC.LBL.TOTL.GD.ZS        FS.AST.PRVT.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
    BM.KLT.DINV.WD.GD.ZS           NY.GDP.PCAP.KD           NY.GDP.FCST.CN        FS.AST.CGOV.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       EN.ATM.CO2E.PP.GD     EG.GDP.PUSE.KO.PP.KD        EG.EGY.PRIM.PP.KD        GC.NFN.TOTL.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       FM.LBL.BMNY.GD.ZS        NY.GDP.PCAP.KD.ZG           NY.GDP.FCST.KD        NY.GDP.TOTL.RT.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.GDP.MKTP.CN           NE.RSB.GNFS.ZS        MS.MIL.XPND.GD.ZS        NY.GDP.NGAS.RT.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.GDP.DISC.CN           NV.IND.TOTL.ZS           NE.GDI.FPRV.ZS        GC.DOD.TOTL.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       FS.AST.DOMO.GD.ZS     EN.ATM.CO2E.PP.GD.KD     BX.KLT.DINV.WD.GD.ZS           NY.GDP.PCAP.KN 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.GDP.FCST.KN           NE.IMP.GNFS.ZS           NY.GNS.ICTR.ZS           NY.GDP.PCAP.CD 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.GDP.DISC.KN           NV.AGR.TOTL.ZS        CM.MKT.TRAD.GD.ZS        CM.MKT.LCAP.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
              PA.NUS.PPP        NY.GDP.MKTP.PP.CD           NY.GDP.DEFL.ZS           NE.EXP.GNFS.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          NY.GDP.PCAP.CN           NY.GDP.FCST.CD           NE.CON.TOTL.ZS        GC.AST.TOTL.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       EG.GDP.PUSE.KO.PP           NE.CON.GOVT.ZS        GC.XPN.TOTL.GD.ZS        FD.AST.PRVT.GD.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          SL.UEM.NEET.ZS        SL.UEM.1524.FE.ZS           SL.SRV.EMPL.ZS           SL.FAM.WORK.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
    SL.EMP.TOTL.SP.FE.ZS        SL.AGR.EMPL.MA.ZS  per_lm_alllm.cov_q5_tot        SL.UEM.INTM.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          SL.TLF.PART.ZS     SL.TLF.0714.WK.MA.ZS        SL.SRV.0714.MA.ZS        SL.FAM.0714.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.EMP.SELF.MA.ZS        SL.AGR.0714.FE.ZS  per_lm_alllm.cov_q1_tot        SL.UEM.TOTL.FE.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.UEM.1524.MA.ZS        SL.TLF.0714.MA.ZS        SL.IND.EMPL.FE.ZS     SL.EMP.TOTL.SP.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
 SL.EMP.1524.SP.FE.NE.ZS     SL.UEM.TOTL.FE.NE.ZS     SL.UEM.1524.MA.NE.ZS        SL.TLF.0714.FE.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
 SL.EMP.TOTL.SP.MA.NE.ZS           SL.AGR.EMPL.ZS           SL.UEM.INTM.ZS           SL.SRV.0714.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          SL.FAM.0714.ZS           SL.EMP.SELF.ZS        SL.AGR.0714.MA.ZS  per_lm_alllm.cov_q2_tot 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.UEM.TOTL.MA.ZS           SL.UEM.1524.ZS     SL.TLF.0714.SW.FE.ZS           SL.IND.EMPL.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.EMP.TOTL.SP.ZS  SL.EMP.1524.SP.MA.NE.ZS        SL.UEM.INTM.FE.ZS        SL.TLF.PART.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.SRV.0714.FE.ZS        SL.FAM.0714.FE.ZS        SL.EMP.SELF.FE.ZS per_lm_alllm.cov_pop_tot 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.UEM.NEET.MA.ZS     SL.UEM.1524.FE.NE.ZS           SL.TLF.0714.ZS        SL.SRV.EMPL.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.FAM.WORK.MA.ZS  SL.EMP.TOTL.SP.FE.NE.ZS        SL.AGR.EMPL.FE.ZS  per_lm_alllm.cov_q4_tot 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.WAG.0714.MA.ZS        SL.UEM.BASC.FE.ZS        SL.TLF.0714.SW.ZS        SL.SLF.0714.FE.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.EMP.WORK.FE.ZS        SL.EMP.MPYR.FE.ZS           SL.WAG.0714.ZS        SL.UEM.BASC.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.SLF.0714.MA.ZS        SL.EMP.WORK.MA.ZS        SL.EMP.MPYR.MA.ZS per_lm_alllm.adq_pop_tot 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.UEM.NEET.FE.ZS        SL.TLF.0714.WK.ZS        SL.SRV.EMPL.FE.ZS        SL.FAM.WORK.FE.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.EMP.SMGT.FE.ZS           SL.AGR.0714.ZS  per_lm_alllm.cov_q3_tot        SL.UEM.TOTL.NE.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.UEM.ADVN.FE.ZS        SL.MNF.0714.FE.ZS        SL.EMP.VULN.FE.ZS     SL.EMP.1524.SP.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          SL.UEM.BASC.ZS        SL.TLF.PART.FE.ZS     SL.TLF.0714.WK.FE.ZS           SL.SLF.0714.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          SL.EMP.WORK.ZS           SL.EMP.MPYR.ZS  per_lm_alllm.ben_q1_tot           SL.UEM.TOTL.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
       SL.UEM.ADVN.MA.ZS     SL.TLF.0714.SW.MA.ZS        SL.MNF.0714.MA.ZS        SL.EMP.VULN.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
    SL.EMP.1524.SP.NE.ZS     SL.UEM.TOTL.MA.NE.ZS        SL.UEM.1524.NE.ZS        SL.IND.EMPL.MA.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
    SL.EMP.TOTL.SP.NE.ZS     SL.EMP.1524.SP.FE.ZS        SL.WAG.0714.FE.ZS           SL.UEM.ADVN.ZS 
               "numeric"                "numeric"                "numeric"                "numeric" 
          SL.MNF.0714.ZS           SL.EMP.VULN.ZS        SL.EMP.1524.SP.ZS 
               "numeric"                "numeric"                "numeric" 
# Replace NA values with 0
#nepal_df["TM.TAX.MRCH.WM.AR.ZS"][is.na(nepal_df["TM.TAX.MRCH.WM.AR.ZS"])] <- 0
#nepal_df["NY.GDP.PETR.RT.ZS"][is.na(nepal_df["NY.GDP.PETR.RT.ZS"])] <- 0
# Replace na values with 0 using is.na()
nepal_df[is.na(nepal_df)] <- 0
nepal_df
# Viewing the data after preparing it.
View(nepal_df)

Parameter Selection:

## Sample parameters selection to achieve project objective.
# GC.TAX.GSRV.VA.ZS -> Taxes on goods and services(%)
# GC.TAX.GSRV.CN -> Taxes on goods and services (current LCU)
# GC.TAX.TOTL.GD.ZS -> Tax revenue (% of GDP)
# IC.TAX.LABR.CP.ZS -> Labor tax and contributions (% of commercial profits) | Labor tax and contributions is the amount of taxes and mandatory contributions on labor paid by the business.
# GC.TAX.YPKG.CN -> Taxes on income, profits and capital gains (current LCU)
# GC.TAX.IMPT.ZS -> Customs and other import duties (% of tax revenue)
# GC.TAX.IMPT.CN -> Customs and other import duties (current LCU)
# GC.TAX.EXPT.ZS -> Taxes on exports (% of tax revenue)
# GC.TAX.EXPT.CN -> Taxes on exports (current LCU)
# IC.TAX.TOTL.CP.ZS -> Total tax and contribution rate (% of profit)
# NY.GDP.MKTP.KD -> GDP (constant 2015 US$)
# NY.GDP.MKTP.KD.ZG -> GDP growth (annual %)
# SL.IND.EMPL.ZS -> Employment in industry (% of total employment) (modeled ILO estimate)
# SL.IND.EMPL.FE.ZS -> Employment in industry, female (% of female employment) (modeled ILO estimate)
# SL.IND.EMPL.MA.ZS -> Employment in industry, male (% of male employment) (modeled ILO estimate)
# SL.AGR.EMPL.ZS -> Employment in agriculture (% of total employment) (modeled ILO estimate)
# SL.AGR.EMPL.FE.ZS -> Employment in agriculture, female (% of female employment) (modeled ILO estimate)
# SL.AGR.EMPL.MA.ZS -> Employment in agriculture, male (% of male employment) (modeled ILO estimate)
## Sample parameter selection to achieve project objective.
# GC.TAX.GSRV.VA.ZS, NY.GDP.MKTP.KD  0.8481471
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.ZS  0.8880489
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.FE.ZS 0.8928028
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.MA.ZS 0.8939309
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.ZS 0.8268747
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.FE.ZS 0.8333567
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.MA.ZS 0.8062022
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.ZS 0.727295
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.FE.ZS 0.7059692
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.MA.ZS 0.7179946
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.ZS 0.893035
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.FE.ZS 0.8984195
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.MA.ZS 0.8992892
# IC.TAX.LABR.CP.ZS
# GC.TAX.YPKG.CN
# GC.TAX.IMPT.ZS
# GC.TAX.EXPT.CN
# IC.TAX.TOTL.CP.ZS
## Sample parameters selection to achieve project objective.
nepal_df <- select(nepal_df, 'YEAR', 'GC.TAX.GSRV.VA.ZS', 'GC.TAX.GSRV.CN', 'GC.TAX.TOTL.GD.ZS', 'IC.TAX.LABR.CP.ZS', 'GC.TAX.YPKG.CN', 'GC.TAX.IMPT.ZS', 'GC.TAX.IMPT.CN', 'GC.TAX.EXPT.ZS', 'GC.TAX.EXPT.CN', 'IC.TAX.TOTL.CP.ZS', 'NY.GDP.MKTP.KD', 'NY.GDP.MKTP.KD.ZG', 'SL.IND.EMPL.ZS', 'SL.IND.EMPL.FE.ZS', 'SL.IND.EMPL.MA.ZS', 'SL.AGR.EMPL.ZS', 'SL.AGR.EMPL.FE.ZS', 'SL.AGR.EMPL.MA.ZS')
nepal_df

Data Quality: Checking the data

## Checking quality of data in parameters selected.
#View(truncate(summary(nepal_df)))
#df_t <- summary(nepal_df)
#View(t(df_t))
View(summary(nepal_df))
stat.desc(nepal_df)

Correlation Analysis: Exploring relationship between employment, tax and GDP. Understanding what drives economic activity.

# Finding correlation between each columns in the dataframe
# cor(nepal_df$TM.TAX.MRCH.WM.AR.ZS, nepal_df$NY.GDP.PETR.RT.ZS)
# cor(nepal_df$GC.TAX.TOTL.GD.ZS, nepal_df$SL.IND.EMPL.FE.ZS)
View(cor(nepal_df))
# Correlation matrix plot
corrplot(cor(nepal_df), type="lower")

var(nepal_df$GC.TAX.GSRV.VA.ZS)
[1] 26.21113
# SL.IND.EMPL.ZS  NY.GDP.MKTP.KD

Time series analysis: Trends/patterns in the data over time

# autoregressive integrated moving average (ARIMA) - need to look at it
# GDP = Consumption + Investment + Government spending + Net exports
p <- ggplot(nepal_df, aes(x=nepal_df$YEAR, y=nepal_df$GC.TAX.GSRV.VA.ZS)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("YEAR") +
  ylab("Taxes on goods and services(%)") +
  ggtitle("Percent increase on tax on goods & services each year")
  #scale_x_date(limit=c(as.Date("1960-01-01"),as.Date("2022-12-30"))) +
  
p

  1. The percent increase in tax on goods and services have remained around 5.5% from year 1990 to 2005.
  2. The percent increase in tax on goods and services has been increasing after the year 2005.
# Check tax and gdp over time
coeff <- 10
tax_color <- "black"
gdp_color <- "steelblue"
ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$GC.TAX.GSRV.CN), size=0.5, color=tax_color) + 
  geom_line( aes(y=nepal_df$NY.GDP.MKTP.KD), size=0.5, color=gdp_color) +
  
  geom_point(aes(y = nepal_df$GC.TAX.GSRV.CN), size=2, color=tax_color) +
  geom_point(aes(y = nepal_df$NY.GDP.MKTP.KD), size=2, color=gdp_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Taxes on goods and services (current LCU)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="GDP (constant 2015 US$)")
  ) +
#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +
  theme(
    axis.title.y = element_text(color = tax_color, size=13),
    axis.title.y.right = element_text(color = gdp_color, size=13)
  ) +
  ggtitle("Tax and GDP over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center
Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.

  1. As the tax on goods and services have increased after the year 2005, the GDP has remained same.
  2. Though the correlation between the indicator (GC.TAX.GSRV.CN & NY.GDP.MKTP.KD) is , the tax has no impact on the GDP growth over the years.
# Check employment in industry and agriculture over the years.
coeff <- 10
ind_color <- "black"
agr_color <- "steelblue"
ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$SL.IND.EMPL.ZS), size=0.5, color=ind_color) + 
  geom_line( aes(y=nepal_df$SL.AGR.EMPL.ZS), size=0.5, color=agr_color) +
  
  geom_point(aes(y = nepal_df$SL.IND.EMPL.ZS), size=2, color=ind_color) +
  geom_point(aes(y = nepal_df$SL.AGR.EMPL.ZS), size=2, color=agr_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Employment in industry (% of total employment)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="Employment in agriculture (% of total employment)")
  ) +
#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +
  theme(
    axis.title.y = element_text(color = ind_color, size=13),
    axis.title.y.right = element_text(color = agr_color, size=13)
  ) +
  ggtitle("Employment in industry & agriculture over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center

  1. After the year 1990, employment in the industrial sector has been increasing each year by .
  2. After the year 1990, employment in the agriculture sector has been decreasing each year by .
coeff <- 10
ind_color <- "black"
agr_color <- "steelblue"
ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$SL.IND.EMPL.FE.ZS), size=0.5, color=ind_color) + 
  geom_line( aes(y=nepal_df$SL.AGR.EMPL.FE.ZS), size=0.5, color=agr_color) +
  
  geom_point(aes(y = nepal_df$SL.IND.EMPL.FE.ZS), size=2, color=ind_color) +
  geom_point(aes(y = nepal_df$SL.AGR.EMPL.FE.ZS), size=2, color=agr_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Employment in industry, female (% of female employment)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="Employment in agriculture, female (% of female employment)")
  ) +
#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +
  theme(
    axis.title.y = element_text(color = ind_color, size=13),
    axis.title.y.right = element_text(color = agr_color, size=13)
  ) +
  ggtitle("Employment in industry & agriculture, females over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center

  1. Percentage in female employment in industry has been increasing by .
  2. Percentage in female employment in agricuture has been decreasing by .
coeff <- 10
ind_color <- "black"
agr_color <- "steelblue"
ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$SL.IND.EMPL.MA.ZS), size=0.5, color=ind_color) + 
  geom_line( aes(y=nepal_df$SL.AGR.EMPL.MA.ZS), size=0.5, color=agr_color) +
  
  geom_point(aes(y = nepal_df$SL.IND.EMPL.MA.ZS), size=2, color=ind_color) +
  geom_point(aes(y = nepal_df$SL.AGR.EMPL.MA.ZS), size=2, color=agr_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Employment in industry, male (% of male employment)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="Employment in agriculture, male (% of male employment)")
  ) +
#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +
  theme(
    axis.title.y = element_text(color = ind_color, size=13),
    axis.title.y.right = element_text(color = agr_color, size=13)
  ) +
  ggtitle("Employment in industry & agriculture, males over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center

  1. Percentage in male employment in industry has been increasing by .
  2. Percentage in male employment in agriculture has been decreasing by .

Regression:

#help(“scale_x_continuous”)

ggplot(nepal_df, aes(x = GC.TAX.GSRV.CN, y = NY.GDP.MKTP.KD)) +
  geom_point() +
geom_smooth() + 
# Add a regression line
xlab("Taxes on goods and services (current LCU)") +
ylab("GDP (constant 2015 US$)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: GDP x taxes on goods & services")

  1. As taxes on goods and services increases, the GDP has been increasing.
  2. The correlation between the two indicators () as , says the same.
# Checking GDP growth on every tax % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.GSRV.VA.ZS, y = NY.GDP.MKTP.KD)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on goods and services (% value added of industry and services)") +
ylab("GDP (constant 2015 US$)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: GDP x taxes on goods & services")

  1. Taxes on goods and services above 10% shows increase in GDP.
  2. When taxes on goods and services are within the range 6.5% to 8.5%, the GDP has been fluctuating.
# Check employment in industry on every tax % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = SL.IND.EMPL.ZS, y = GC.TAX.GSRV.VA.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Employment in industry (% of total employment)") +
ylab("Taxes on goods and services (% value added of industry and services)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Tax on goods & services X Employment in industry")

  1. Increase in taxes on goods and services above 7% shows increase in percent of employment in industry.
  2. Likewise, the taxes around 7.5% shows percent of employment in industry from 2.5% to 10%.
# Check employment in agriculture on every tax % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.GSRV.VA.ZS, y = SL.AGR.EMPL.ZS )) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on goods and services (% value added of industry and services)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Tax on goods & services X Employment in agriculture")

  1. Despite the correlation between the indicators (), the increase in taxes on goods and services shows slow decrease in employment in agriculture.
# Check employment in industry on every customs/import duties % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.IND.EMPL.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in industry (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in industry")

  1. As customs and import duties increases, the employment in industrial sector decreases.
  2. It can be concluded that as customs and import duties is above 25%, we can see steep decline in employment in industry.
# Check employment in agriculture on every customs/import duties % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.AGR.EMPL.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in agriculture")

  1. Though the correlation between the two indicator ( and ) is , the increase in customs and import duties has small change on the employment in agriculture.
# Check employment in industry on every export taxes % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.EXPT.ZS , y = SL.IND.EMPL.ZS )) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on exports (% of tax revenue)") +
ylab("Employment in industry (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Exports taxes vs Employment in Industry")

  1. As taxes on exports increases, the employment in industry remains unchanged(but slightly decreasing).
# Check employment in industry on every export taxes % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.EXPT.ZS, y = SL.AGR.EMPL.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on exports (% of tax revenue)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Exports vs Employment in Agriculture")

  1. As taxes on exports increases, the employment in agriculture remains unchanged(but slightly increasing).

Bar Plots:

# Check employment in agriculture on every export taxes % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = nepal_df$SL.IND.EMPL.ZS, y = nepal_df$NY.GDP.MKTP.KD, fill = nepal_df$SL.IND.EMPL.ZS)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.08) +
  #theme_bw() +
  xlab("Employment in industry (% of total employment)") +
  ylab("GDP (constant 2015 US$)") +
  theme(axis.text.x = element_text(size = 10)) +
  theme(axis.text.y = element_text(size = 10)) +
  ggtitle("Bar plot: GDP vs Employment in industry")

  1. GDP is highest(around $30 billion) when employment in industry(% of total employment) is 15%.
  2. GDP is lowest(below $10 billion) when employment in industry(% of total employment) is 2.7%.
ggplot(nepal_df, aes(x = nepal_df$GC.TAX.GSRV.VA.ZS, y = nepal_df$NY.GDP.MKTP.KD, fill = nepal_df$GC.TAX.GSRV.VA.ZS)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.08) +
  #theme_bw() +
  xlab("Taxes on goods and services(%)") +
  ylab("GDP (constant 2015 US$)") +
  theme(axis.text.x = element_text(size = 10)) +
  theme(axis.text.y = element_text(size = 10)) +
  ggtitle("Bar plot: GDP vs Taxes on goods & services(%)")

#GC.TAX.GSRV.VA.ZS, NY.GDP.MKTP.KD
  1. GDP is highest(around $30 billion) when employment in taxes on goods and sevices is 10.6%.
  2. GDP is lowest(below $10 billion) when employment in industry(% of total employment) is 7%.

Cluster Analysis:

# Scatterplot for Taxes on goods and services and GDP
There were 15 warnings (use warnings() to see them)
library(scatterplot3d)
scatterplot3d(nepal_df$GC.TAX.GSRV.CN, nepal_df$NY.GDP.MKTP.KD, nepal_df$SL.IND.EMPL.ZS,
              xlab = "Tax", ylab = "GDP", zlab = "Employment",
              type = "h", main = "3D Scatterplot")

#library(rgl)
#plot3d(nepal_df$GC.TAX.GSRV.CN, nepal_df$NY.GDP.MKTP.KD, nepal_df$SL.IND.EMPL.ZS)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

---
title: "Analyzing economic trends in Nepal"
output: html_notebook
---


```{r}
# Import packages

#install.packages("corrplot")
library(dplyr)
library(data.table)
library(ggplot2)
library(pastecs)
library(corrplot)
#library(ggthemes) # For appearance of plot like theme in ggplot2
```

```{r}
# Setting environment
# remove(list=ls())
# setwd("C:\\Users\\sunil\\Downloads\\College\\DAV\\Project")
# make evironment not to change large number to exponential
options(scipen = 999)
```

```{r}
# Import dataset
nepal_dt <- read.csv("Source Dataset-API_NPL_DS2.csv", skip=4, header=TRUE, stringsAsFactors = FALSE)
meta_country <- read.csv("MetaData_Country.csv", header=TRUE, stringsAsFactors = FALSE)
meta_indictr <- read.csv("MetaData_Indicator.csv", header=TRUE, stringsAsFactors = FALSE)
nepal_dt
meta_country
meta_indictr
```


Data Preparation: Preparing data after the import

```{r}
temp_df = filter(nepal_dt, grepl("tax", tolower(IndicatorName), fixed = TRUE) | grepl("tax", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- temp_df
nepal_df
```

```{r}
dim(nepal_df)
```

```{r}
temp_df = filter(nepal_dt, grepl("gdp", tolower(IndicatorName), fixed = TRUE) | grepl("gdp", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- rbind(nepal_df, temp_df)
nepal_df
```

```{r}
dim(nepal_df)
```

```{r}
temp_df = filter(nepal_dt, grepl("employment", tolower(IndicatorName), fixed = TRUE) | grepl("employment", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- rbind(nepal_df, temp_df)
nepal_df
```

```{r}
# Drop first and second column

nepal_df <- nepal_df[-c(1,2)]
nepal_df
```

```{r}
# unique(nepal_df$IndicatorName)
#table(tolower(nepal_df$IndicatorName))
```

```{r}
# Transposing the dataframe

# df_t <- (t(nepal_df))

df_t <- transpose(nepal_df)
rownames(df_t) <- colnames(nepal_df)
colnames(df_t) <- rownames(nepal_df)
#View(df_t)
```

```{r}
df_t[0,]
```

```{r}
# Rename the columns with the first row. Columns are not properly renamed from above lines.
colnames(df_t) <- df_t[2,]

# Remove the first and second row.
df_t <- df_t[-1:-2,]
nepal_df <- df_t
View(nepal_df)
```

```{r}
# Keep rownames as a first column

#setDT(df_t, keep.rownames = TRUE)[]
nepal_df <- cbind(names = rownames(nepal_df), nepal_df)
colnames(nepal_df)[1] <- "YEAR"

# Removing a character 'X' from the column: YEAR in nepal_df
nepal_df$YEAR <- gsub("X","",as.character(nepal_df$YEAR))
nepal_df
```

```{r}
dim(nepal_df)[2]
```

```{r}
nepal_df
```

```{r}
# Converting columns to numeric types

#nepal_df$TM.TAX.MRCH.WM.AR.ZS = as.numeric(as.character(nepal_df$TM.TAX.MRCH.WM.AR.ZS))
#nepal_df$NY.GDP.PETR.RT.ZS = as.numeric(as.character(nepal_df$NY.GDP.PETR.RT.ZS))

nepal_df[1:dim(nepal_df)[2]] <- sapply(nepal_df[1:dim(nepal_df)[2]],as.numeric)
sapply(nepal_df, class)
```

```{r}
# Replace NA values with 0
#nepal_df["TM.TAX.MRCH.WM.AR.ZS"][is.na(nepal_df["TM.TAX.MRCH.WM.AR.ZS"])] <- 0
#nepal_df["NY.GDP.PETR.RT.ZS"][is.na(nepal_df["NY.GDP.PETR.RT.ZS"])] <- 0

# Replace na values with 0 using is.na()
nepal_df[is.na(nepal_df)] <- 0
```

```{r}
nepal_df
```

```{r}
# Viewing the data after preparing it.
View(nepal_df)
```


Parameter Selection: 

```{r}
## Sample parameters selection to achieve project objective.
# GC.TAX.GSRV.VA.ZS -> Taxes on goods and services(%)
# GC.TAX.GSRV.CN -> Taxes on goods and services (current LCU)
# GC.TAX.TOTL.GD.ZS -> Tax revenue (% of GDP)
# IC.TAX.LABR.CP.ZS -> Labor tax and contributions (% of commercial profits) | Labor tax and contributions is the amount of taxes and mandatory contributions on labor paid by the business.
# GC.TAX.YPKG.CN -> Taxes on income, profits and capital gains (current LCU)
# GC.TAX.IMPT.ZS ->	Customs and other import duties (% of tax revenue)
# GC.TAX.IMPT.CN -> Customs and other import duties (current LCU)
# GC.TAX.EXPT.ZS ->	Taxes on exports (% of tax revenue)
# GC.TAX.EXPT.CN -> Taxes on exports (current LCU)
# IC.TAX.TOTL.CP.ZS -> Total tax and contribution rate (% of profit)

# NY.GDP.MKTP.KD -> GDP (constant 2015 US$)
# NY.GDP.MKTP.KD.ZG	-> GDP growth (annual %)

# SL.IND.EMPL.ZS ->	Employment in industry (% of total employment) (modeled ILO estimate)
# SL.IND.EMPL.FE.ZS -> Employment in industry, female (% of female employment) (modeled ILO estimate)
# SL.IND.EMPL.MA.ZS -> Employment in industry, male (% of male employment) (modeled ILO estimate)
# SL.AGR.EMPL.ZS -> Employment in agriculture (% of total employment) (modeled ILO estimate)
# SL.AGR.EMPL.FE.ZS -> Employment in agriculture, female (% of female employment) (modeled ILO estimate)
# SL.AGR.EMPL.MA.ZS -> Employment in agriculture, male (% of male employment) (modeled ILO estimate)
```

```{r}
## Sample parameter selection to achieve project objective.
# GC.TAX.GSRV.VA.ZS, NY.GDP.MKTP.KD  0.8481471
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.ZS  0.8880489
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.FE.ZS 0.8928028
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.MA.ZS 0.8939309
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.ZS 0.8268747
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.FE.ZS 0.8333567
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.MA.ZS 0.8062022
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.ZS 0.727295
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.FE.ZS 0.7059692
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.MA.ZS 0.7179946
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.ZS 0.893035
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.FE.ZS 0.8984195
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.MA.ZS 0.8992892
# IC.TAX.LABR.CP.ZS
# GC.TAX.YPKG.CN
# GC.TAX.IMPT.ZS
# GC.TAX.EXPT.CN
# IC.TAX.TOTL.CP.ZS

```

```{r}
## Sample parameters selection to achieve project objective.
nepal_df <- select(nepal_df, 'YEAR', 'GC.TAX.GSRV.VA.ZS', 'GC.TAX.GSRV.CN', 'GC.TAX.TOTL.GD.ZS', 'IC.TAX.LABR.CP.ZS', 'GC.TAX.YPKG.CN', 'GC.TAX.IMPT.ZS', 'GC.TAX.IMPT.CN', 'GC.TAX.EXPT.ZS', 'GC.TAX.EXPT.CN', 'IC.TAX.TOTL.CP.ZS', 'NY.GDP.MKTP.KD', 'NY.GDP.MKTP.KD.ZG', 'SL.IND.EMPL.ZS', 'SL.IND.EMPL.FE.ZS', 'SL.IND.EMPL.MA.ZS', 'SL.AGR.EMPL.ZS', 'SL.AGR.EMPL.FE.ZS', 'SL.AGR.EMPL.MA.ZS')
nepal_df
```

--------------------------------------------------------------------------------


Data Quality: Checking the data 

```{r}
## Checking quality of data in parameters selected.
#View(truncate(summary(nepal_df)))
#df_t <- summary(nepal_df)
#View(t(df_t))
View(summary(nepal_df))
```

```{r}
stat.desc(nepal_df)
```

--------------------------------------------------------------------------------


Correlation Analysis: Exploring relationship between employment, tax and GDP. Understanding what drives economic activity.

```{r}
# Finding correlation between each columns in the dataframe

# cor(nepal_df$TM.TAX.MRCH.WM.AR.ZS, nepal_df$NY.GDP.PETR.RT.ZS)
# cor(nepal_df$GC.TAX.TOTL.GD.ZS, nepal_df$SL.IND.EMPL.FE.ZS)

View(cor(nepal_df))
```

```{r}
# Correlation matrix plot

corrplot(cor(nepal_df), type="lower")
```

```{r}
var(nepal_df$GC.TAX.GSRV.VA.ZS)
# SL.IND.EMPL.ZS  NY.GDP.MKTP.KD
```

--------------------------------------------------------------------------------


Time series analysis: Trends/patterns in the data over time

```{r fig.height = 4, fig.width = 11}
# autoregressive integrated moving average (ARIMA) - need to look at it
# GDP = Consumption + Investment + Government spending + Net exports

p <- ggplot(nepal_df, aes(x=nepal_df$YEAR, y=nepal_df$GC.TAX.GSRV.VA.ZS)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("YEAR") +
  ylab("Taxes on goods and services(%)") +
  ggtitle("Percent increase on tax on goods & services each year")
  #scale_x_date(limit=c(as.Date("1960-01-01"),as.Date("2022-12-30"))) +
  
p
```
  1. The percent increase in tax on goods and services have remained around 5.5% from year 1990 to 2005.
  2. The percent increase in tax on goods and services has been increasing after the year 2005.
  

```{r fig.height = 6, fig.width = 14}

# Check tax and gdp over time

coeff <- 10
tax_color <- "black"
gdp_color <- "steelblue"

ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$GC.TAX.GSRV.CN), size=0.5, color=tax_color) + 
  geom_line( aes(y=nepal_df$NY.GDP.MKTP.KD), size=0.5, color=gdp_color) +
  
  geom_point(aes(y = nepal_df$GC.TAX.GSRV.CN), size=2, color=tax_color) +
  geom_point(aes(y = nepal_df$NY.GDP.MKTP.KD), size=2, color=gdp_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Taxes on goods and services (current LCU)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="GDP (constant 2015 US$)")
  ) +

#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +

  theme(
    axis.title.y = element_text(color = tax_color, size=13),
    axis.title.y.right = element_text(color = gdp_color, size=13)
  ) +

  ggtitle("Tax and GDP over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center

```
  1. As the tax on goods and services have increased after the year 2005, the GDP has remained same.
  2. Though the correlation between the indicator (GC.TAX.GSRV.CN & NY.GDP.MKTP.KD) is , the tax has no impact on the        GDP growth over the years.
  
  
```{r}
# Check employment in industry and agriculture over the years.

coeff <- 10
ind_color <- "black"
agr_color <- "steelblue"

ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$SL.IND.EMPL.ZS), size=0.5, color=ind_color) + 
  geom_line( aes(y=nepal_df$SL.AGR.EMPL.ZS), size=0.5, color=agr_color) +
  
  geom_point(aes(y = nepal_df$SL.IND.EMPL.ZS), size=2, color=ind_color) +
  geom_point(aes(y = nepal_df$SL.AGR.EMPL.ZS), size=2, color=agr_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Employment in industry (% of total employment)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="Employment in agriculture (% of total employment)")
  ) +

#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +

  theme(
    axis.title.y = element_text(color = ind_color, size=13),
    axis.title.y.right = element_text(color = agr_color, size=13)
  ) +

  ggtitle("Employment in industry & agriculture over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center

```
  1. After the year 1990, employment in the industrial sector has been increasing each year by .
  2. After the year 1990, employment in the agriculture sector has been decreasing each year by .
  
  
```{r}
coeff <- 10
ind_color <- "black"
agr_color <- "steelblue"

ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$SL.IND.EMPL.FE.ZS), size=0.5, color=ind_color) + 
  geom_line( aes(y=nepal_df$SL.AGR.EMPL.FE.ZS), size=0.5, color=agr_color) +
  
  geom_point(aes(y = nepal_df$SL.IND.EMPL.FE.ZS), size=2, color=ind_color) +
  geom_point(aes(y = nepal_df$SL.AGR.EMPL.FE.ZS), size=2, color=agr_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Employment in industry, female (% of female employment)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="Employment in agriculture, female (% of female employment)")
  ) +

#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +

  theme(
    axis.title.y = element_text(color = ind_color, size=13),
    axis.title.y.right = element_text(color = agr_color, size=13)
  ) +

  ggtitle("Employment in industry & agriculture, females over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center
```
  1. Percentage in female employment in industry has been increasing by .
  2. Percentage in female employment in agricuture has been decreasing by .
  
  
```{r}
coeff <- 10
ind_color <- "black"
agr_color <- "steelblue"

ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
  
  geom_line( aes(y=nepal_df$SL.IND.EMPL.MA.ZS), size=0.5, color=ind_color) + 
  geom_line( aes(y=nepal_df$SL.AGR.EMPL.MA.ZS), size=0.5, color=agr_color) +
  
  geom_point(aes(y = nepal_df$SL.IND.EMPL.MA.ZS), size=2, color=ind_color) +
  geom_point(aes(y = nepal_df$SL.AGR.EMPL.MA.ZS), size=2, color=agr_color) +
  
  scale_y_continuous(
    
    # First axis
    name = "Employment in industry, male (% of male employment)",
    
    # Second axis
    sec.axis = sec_axis(~.*1, name="Employment in agriculture, male (% of male employment)")
  ) +

#  theme_ipsum() +
  scale_x_continuous(
    name = "YEAR"
  ) +

  theme(
    axis.title.y = element_text(color = ind_color, size=13),
    axis.title.y.right = element_text(color = agr_color, size=13)
  ) +

  ggtitle("Employment in industry & agriculture, males over time") +
  theme(plot.title = element_text(hjust = 0.5)) #Title to be at center
```
  1. Percentage in male employment in industry has been increasing by .
  2. Percentage in male employment in agriculture has been decreasing by .


```{r}

```

--------------------------------------------------------------------------------


Regression:

#help("scale_x_continuous")

```{r}
ggplot(nepal_df, aes(x = GC.TAX.GSRV.CN, y = NY.GDP.MKTP.KD)) +
  geom_point() +
geom_smooth() + 
# Add a regression line
xlab("Taxes on goods and services (current LCU)") +
ylab("GDP (constant 2015 US$)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: GDP x taxes on goods & services")
```
  1. As taxes on goods and services increases, the GDP has been increasing. 
  2. The correlation between the two indicators () as , says the same.
  

```{r}
# Checking GDP growth on every tax % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = GC.TAX.GSRV.VA.ZS, y = NY.GDP.MKTP.KD)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on goods and services (% value added of industry and services)") +
ylab("GDP (constant 2015 US$)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: GDP x taxes on goods & services")
```
  1. Taxes on goods and services above 10% shows increase in GDP.
  2. When taxes on goods and services are within the range 6.5% to 8.5%, the GDP has been fluctuating.
  
  
```{r}
# Check employment in industry on every tax % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = SL.IND.EMPL.ZS, y = GC.TAX.GSRV.VA.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Employment in industry (% of total employment)") +
ylab("Taxes on goods and services (% value added of industry and services)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Tax on goods & services X Employment in industry")
```
  1. Increase in taxes on goods and services above 7% shows increase in percent of employment in industry.
  2. Likewise, the taxes around 7.5% shows percent of employment in industry from 2.5% to 10%.
  
  
```{r}
# Check employment in agriculture on every tax % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = GC.TAX.GSRV.VA.ZS, y = SL.AGR.EMPL.ZS )) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on goods and services (% value added of industry and services)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Tax on goods & services X Employment in agriculture")
```
  1. Despite the correlation between the indicators (), the increase in taxes on goods and services shows slow decrease      in employment in agriculture.
  
  
```{r}
# Check employment in industry on every customs/import duties % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.IND.EMPL.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in industry (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in industry")
```
  1. As customs and import duties increases, the employment in industrial sector decreases.
  2. It can be concluded that as customs and import duties is above 25%, we can see steep decline in employment in           industry.


```{r}
# Check employment in agriculture on every customs/import duties % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.AGR.EMPL.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in agriculture")
```
  1. Though the correlation between the two indicator ( and ) is , the increase in customs and import duties has small       change on the employment in agriculture.


```{r}
# Check employment in industry on every export taxes % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = GC.TAX.EXPT.ZS , y = SL.IND.EMPL.ZS )) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on exports (% of tax revenue)") +
ylab("Employment in industry (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Exports taxes vs Employment in Industry")
```
  1. As taxes on exports increases, the employment in industry remains unchanged(but slightly decreasing).


```{r}
# Check employment in agriculture on every export taxes % increase
# with trend line (regression line)

ggplot(nepal_df, aes(x = GC.TAX.EXPT.ZS, y = SL.AGR.EMPL.ZS)) +
  geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on exports (% of tax revenue)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Exports vs Employment in Agriculture")
```
  1. As taxes on exports increases, the employment in agriculture remains unchanged(but slightly increasing).


```{r}

```

--------------------------------------------------------------------------------


Bar Plots:

```{r}
# Bar plot for GDP vs employment in industry

ggplot(nepal_df, aes(x = nepal_df$SL.IND.EMPL.ZS, y = nepal_df$NY.GDP.MKTP.KD, fill = nepal_df$SL.IND.EMPL.ZS)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.08) +
  #theme_bw() +
  xlab("Employment in industry (% of total employment)") +
  ylab("GDP (constant 2015 US$)") +
  theme(axis.text.x = element_text(size = 10)) +
  theme(axis.text.y = element_text(size = 10)) +
  ggtitle("Bar plot: GDP vs Employment in industry")
```
  1. GDP is highest(around $30 billion) when employment in industry(% of total employment) is 15%.
  2. GDP is lowest(below $10 billion) when employment in industry(% of total employment) is 2.7%.


```{r}
# Bar plot for GDP vs taxes on good and services

ggplot(nepal_df, aes(x = nepal_df$GC.TAX.GSRV.VA.ZS, y = nepal_df$NY.GDP.MKTP.KD, fill = nepal_df$GC.TAX.GSRV.VA.ZS)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.08) +
  #theme_bw() +
  xlab("Taxes on goods and services(%)") +
  ylab("GDP (constant 2015 US$)") +
  theme(axis.text.x = element_text(size = 10)) +
  theme(axis.text.y = element_text(size = 10)) +
  ggtitle("Bar plot: GDP vs Taxes on goods & services(%)")

```
  1. GDP is highest(around $30 billion) when employment in taxes on goods and sevices is 10.6%.
  2. GDP is lowest(below $10 billion) when employment in industry(% of total employment) is 7%.


```{r}

```

--------------------------------------------------------------------------------


Cluster Analysis:

```{r}
# Scatterplot for Taxes on goods and services and GDP

library(scatterplot3d)
scatterplot3d(nepal_df$GC.TAX.GSRV.CN, nepal_df$NY.GDP.MKTP.KD, nepal_df$SL.IND.EMPL.ZS,
              xlab = "Tax", ylab = "GDP", zlab = "Employment",
              type = "h", main = "3D Scatterplot")
```

```{r}
#library(rgl)
#plot3d(nepal_df$GC.TAX.GSRV.CN, nepal_df$NY.GDP.MKTP.KD, nepal_df$SL.IND.EMPL.ZS)
```

```{r}

```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.


This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 
